set more off
clear all

cls

local year0=1980
local year1=1990
local year2=2018

	
*default is to set both flags to zero. This will produce Tables 3, 10. If flag_hh_level=1 and flag_married_only=0, then Tables 11 (columns 1-4) and 12 (columns 1-2) are produced.	If flag_hh_level=1 and flag_married_only=1, then Tables 11 (columns 5-8) and 12 (columns 3-4) are produced.
local flag_hh_level=0
local flag_married_only=0


/* If all these flags are set to 0, use pre-created files to run regressions (this is much faster). If all of them are set to 1, re-create these files.*/
local flag_initial_processing=0
local flag_step1=0
local flag_step2=0
local flag_step3=0
local flag_step4=0

if `flag_initial_processing'==1 {
	
	foreach year of numlist `year0' `year1' `year2' {
	
		do initial_processing
	
		use TEMP_data_`year'_processed, replace
		
		
		*add CPI
		merge m:1 year using "../US_prices_4_regions/prices_US_rec_detailed" 
		drop _merge
		drop if serial==.
		rename price_CPI aux
		drop price*
		rename aux price_CPI
		
		*pick weights to use
		gen weight=perwt
		if `flag_hh_level'==1{
			replace weight=hhwt
		}
		drop perwt hhwt
		
		gen period=.
		replace period=0 if year==`year0'
		replace period=1 if year==`year1'
		replace period=2 if year==`year2'
		
		
		***hours variables
		drop if uhrswork==0 & incwage>0
		drop if incwage==0 & uhrswork>0
		
		*for later dates, wkswork1 is not available. interpolate using previous data.
		*numbers are from census_analysis. They are obtained from: reg wkswork1 i.wkswork2, nocons
		capture confirm var wkswork1, exact
		if c(rc) == 111 { // VARIABLE NOT FOUND
			gen wkswork1=.
		}
		replace wkswork1=7.53 if wkswork2==1 & wkswork1==.
		replace wkswork1=21.21 if wkswork2==2 & wkswork1==.
		replace wkswork1=33.53 if wkswork2==3 & wkswork1==.
		replace wkswork1=42.37 if wkswork2==4 & wkswork1==.
		replace wkswork1=48.25 if wkswork2==5 & wkswork1==.
		replace wkswork1=51.82 if wkswork2==6 & wkswork1==.
		replace wkswork1=0 if wkswork2==0 & wkswork1==.
		gen hours=uhrswork*wkswork1
		drop uhrswork wkswork1 wkswork2
		
		
		if `flag_hh_level'==1{
			keep if relate==1
		}
		
	
	
		****employment
		gen employed=0
		replace employed=1 if empstat==1

	
		****wage
		rename incwage wage_bill 
		
		****dealing with regions: Dorn's approach (see initial_processing.do)
		rename czone region
		
		
		*****dealing with industries
		gen ind_proc=.
		replace ind_proc=1 if ind1990>=10 & ind1990<=32 /*agriculture, forestry, fishing*/
		replace ind_proc=2 if ind1990>=40 & ind1990<=50 /*mining*/	
		replace ind_proc=3 if ind1990>=60 & ind1990<=60 /*construction*/	
		replace ind_proc=4 if ind1990>=100 & ind1990<=130 /*manuf, nondur, food+tobacco*/
		replace ind_proc=5 if ind1990>=132 & ind1990<=150 /*manuf, nondur, textile mill products*/
		replace ind_proc=6 if ind1990>=151 & ind1990<=152 /*manuf, nondur, apparel and other finished textile*/
		replace ind_proc=7 if ind1990>=160 & ind1990<=162 /*manuf, nondur, paper*/
		replace ind_proc=8 if ind1990>=171 & ind1990<=172 /*manuf, nondur, printing and publishing*/
		replace ind_proc=9 if ind1990>=180 & ind1990<=192 /*manuf, nondur, chemicals*/
		replace ind_proc=10 if ind1990>=200 & ind1990<=201 /*manuf, nondur, petroleum and coal*/
		replace ind_proc=11 if ind1990>=210 & ind1990<=212 /*manuf, nondur, rubber*/
		replace ind_proc=12 if ind1990>=220 & ind1990<=222 /*manuf, nondur, leather*/		
		replace ind_proc=13 if ind1990>=230 & ind1990<=241 /*manuf, dur, lumber and wood*/
		replace ind_proc=14 if ind1990>=242 & ind1990<=242 /*manuf, dur, furniture*/
		replace ind_proc=15 if ind1990>=250 & ind1990<=262 /*manuf, dur, stone, clas, glass, concrete*/
		replace ind_proc=16 if ind1990>=270 & ind1990<=301 /*manuf, dur, metal*/
		replace ind_proc=17 if ind1990>=310 & ind1990<=332 /*manuf, dur, machinery and computing*/
		replace ind_proc=18 if ind1990>=340 & ind1990<=350 /*manuf, dur, electrical machinery*/
		replace ind_proc=19 if ind1990>=351 & ind1990<=370 /*manuf, dur, transp equipment*/
		replace ind_proc=20 if ind1990>=371 & ind1990<=381 /*manuf, dur, prof and photo equipment*/
		replace ind_proc=21 if ind1990>=390 & ind1990<=390 /*manuf, dur, toys, sporting goods*/		
		replace ind_proc=22 if ind1990>=391 & ind1990<=392 /*manuf, other*/		
		replace ind_proc=23 if ind1990>=400 & ind1990<=432 /*transport*/
		replace ind_proc=24 if ind1990>=440 & ind1990<=442 /*communications*/
		replace ind_proc=25 if ind1990>=450 & ind1990<=472 /*utilities*/	
		replace ind_proc=26 if ind1990>=500 & ind1990<=532 /*wholesale trade, durables*/
		replace ind_proc=27 if ind1990>=540 & ind1990<=571 /*wholesale trade, nondurables*/
		replace ind_proc=28 if ind1990>=580 & ind1990<=691 /*retail trade*/
		replace ind_proc=29 if ind1990>=700 & ind1990<=712 /*finance, insurance, real estate*/
		replace ind_proc=30 if ind1990>=721 & ind1990<=760 /*business and repair services*/
		replace ind_proc=31 if ind1990>=761 & ind1990<=791 /*personal services*/
		replace ind_proc=32 if ind1990>=800 & ind1990<=810 /*recreation services*/
		replace ind_proc=33 if ind1990>=812 & ind1990<=893 /*prof and related services*/		
		replace ind_proc=34 if ind1990>=900 & ind1990<=932 /*public admin*/		
		drop if ind1990>=940 & ind1990<=960 /*drop active duty*/
		replace ind_proc=0 if ind1990>=992 & ind1990<=999 /*NA*/
		replace ind_proc=0 if ind1990==0 /*NA*/

		********educ/age_ptf
		gen ptf_educ=.
		replace ptf_educ=1 if educ>=0 & educ<=5
		replace ptf_educ=2 if educ==6
		replace ptf_educ=3 if educ>=7 & educ<=9
		replace ptf_educ=4 if educ==10
		replace ptf_educ=5 if educ==11
		drop if ptf_educ==.
	
		gen ptf_age=.
		replace ptf_age=1 if age>=25 & age<35
		replace ptf_age=2 if age>=35 & age<50
		replace ptf_age=3 if age>=50 & age<65
		drop if ptf_age==.
		
		egen long characteristics_ptf=group(ptf_age ptf_educ) 
	
		if `flag_married_only'==1 {
			keep if marst==1 | marst==2
		}
		

		
		*renormalize weights so that they sum to 1 every year
		egen tot_weight=total(weight)
		replace tot_weight=tot_weight/1000000
		replace weight=weight/tot_weight
		drop tot_weight
		sort region
		
		if `year'==1980 {
			keep year weight region age sex race marst educ hours wage_bill ind_proc period price_CPI employed /*
			*/ characteristics_ptf ftotinc disabwrk N_kids ptf_age ptf_educ
		}
		if `year'>1980 {
			keep year weight region age sex race marst educ hours wage_bill ind_proc period price_CPI employed /*
			*/ characteristics_ptf ftotinc diffmob diffcare N_kids ptf_age ptf_educ
		}
		
		if `flag_hh_level'==0 & `flag_married_only'==0 {
			save TEMP_data_`year'_further_processed, replace
		}
		if `flag_hh_level'==1 & `flag_married_only'==0 {
			save TEMP_data_`year'_further_processed_hh, replace
		}
		if `flag_married_only'==1 {
			save TEMP_data_`year'_further_processed_married, replace
		}
	}
}




********************************************************************************
****STEP 1: CONSTRUCT WAGES AND HOURS FOR EACH YEAR AND GROUP (GEO + DEMO)******
********************************************************************************
if `flag_step1'==1 {


	foreach year of numlist `year0' `year1' `year2' {
	
		if `flag_hh_level'==0 & `flag_married_only'==0  {
			use TEMP_data_`year'_further_processed, replace
		}
		if `flag_hh_level'==1 & `flag_married_only'==0  {
			use TEMP_data_`year'_further_processed_hh, replace
		}
		if `flag_married_only'==1 {
			use TEMP_data_`year'_further_processed_married, replace
		}		
	
		
		local var_list_cross_section "region characteristics_ptf"
		
		*variable to group by cross-sectionally (geography, demographics)
		egen long V=group(`var_list_cross_section') 	

		sort V
		
		*size of each group
		by V: egen population_V=total(weight)
		*total working hours and wage income in each group
		by V: egen hours_V=total(weight*hours)
		by V: egen wage_bill_V=total(weight*wage_bill)
		by V: egen employment_V=total(weight*employed)
		by V: egen N_obs_V=count(weight)
	 
		by V: gen id=_n
		drop if id>1
		drop id
		
		gen wage_bill_V_r=wage_bill_V/price_CPI

		
		gen wage_V_r=wage_bill_V_r/hours_V
		gen wage_empl_V_r=wage_bill_V_r/employment_V
		gen hours_per_capita_V=hours_V/population_V
		gen employment_per_capita_V=employment_V/population_V
		gen hours_per_employed_V=hours_V/employment_V
		
		save TEMP_step1_`year', replace
	}
	

	
	**now construct growth rates by period
	use TEMP_step1_`year0', clear
	append using TEMP_step1_`year1'
	append using TEMP_step1_`year2'
	
	drop if period==.
	sort V period
	by V period: gen id=_n
	drop if id>1
	
	sort V period
	by V: gen gr_employment_V=log(employment_V/employment_V[_n-1])
	by V: gen gr_employment_per_capita_V=log(employment_per_capita_V/employment_per_capita_V[_n-1])
	by V: gen gr_hours_V=log(hours_V/hours_V[_n-1])
	by V: gen gr_hours_per_capita_V=log(hours_per_capita_V/hours_per_capita_V[_n-1])
	by V: gen gr_hours_per_employed_V=log(hours_per_employed_V/hours_per_employed_V[_n-1])
	by V: gen gr_wage_V_r=log(wage_V_r/wage_V_r[_n-1])
	by V: gen gr_wage_empl_V_r=log(wage_empl_V_r/wage_empl_V_r[_n-1])
	by V: gen gr_wage_bill_V_r=log(wage_bill_V_r/wage_bill_V_r[_n-1])
	by V: gen N_obs_V_prev=N_obs_V[_n-1]
	gen N_obs_V2=.
	replace N_obs_V2=N_obs_V if period==2
	gen N_obs_V1=.
	replace N_obs_V1=N_obs_V_prev if period==2
	gen N_obs_V_min=N_obs_V2
	replace N_obs_V_min=N_obs_V1 if N_obs_V1<N_obs_V2
	

	
	keep `var_list_cross_section' gr* population_V period  ptf_age ptf_educ N_obs_V1 N_obs_V2 N_obs_V_min
	
	
	/*create initial population, to be merged with final dataset*/
	preserve 
		keep `var_list_cross_section' period population_V
		keep if period==0
		drop period
		sort region characteristics_ptf
		by region: egen population_reg_0=total(population_V)
		sort characteristics_ptf region
		by characteristics_ptf: egen population_ptf_0=total(population_V)		
		rename population_V population_V_0
		save temps0, replace
	restore
	preserve 
		keep `var_list_cross_section' period population_V
		keep if period==1
		drop period
		sort region characteristics_ptf
		by region: egen population_reg_1=total(population_V)
		sort characteristics_ptf region
		by characteristics_ptf: egen population_ptf_1=total(population_V)	
		rename population_V population_V_1
		save temps1, replace
	restore
	preserve 
		keep `var_list_cross_section' period population_V
		keep if period==2
		drop period
		sort region characteristics_ptf
		by region: egen population_reg_2=total(population_V)
		sort characteristics_ptf region
		by characteristics_ptf: egen population_ptf_2=total(population_V)	
		rename population_V population_V_2
		save temps2, replace
	restore
		
	merge m:1 `var_list_cross_section' using temps0
	drop _merge
	merge m:1 `var_list_cross_section' using temps1
	drop _merge	
	merge m:1 `var_list_cross_section' using temps2
	drop _merge
	cap erase temps0.dta
	cap erase temps1.dta
	cap erase temps2.dta
	
	if `flag_hh_level'==0 & `flag_married_only'==0  {
		save TEMP_wage_hour_V, replace
	}
	if `flag_hh_level'==1 & `flag_married_only'==0  {
		save TEMP_wage_hour_V_hh, replace
	}
	if `flag_married_only'==1 {
		save TEMP_wage_hour_V_married, replace
	}

	
	cap erase TEMP_step1_`year0'.dta
	cap erase TEMP_step1_`year1'.dta
	cap erase TEMP_step1_`year2'.dta
}
		
		

	
********************************************************************************
**************STEP 2: INDUSTRY SHARES BY REGION/DEMOGRAPHICS********************
********************************************************************************
if `flag_step2'==1{
	
	foreach year of numlist `year0' `year1' `year2' {
	*foreach year of numlist `year2' {
		if `flag_hh_level'==0 & `flag_married_only'==0 {
			use TEMP_data_`year'_further_processed, replace
		}
		if `flag_hh_level'==1 & `flag_married_only'==0 {
			use TEMP_data_`year'_further_processed_hh, replace
		}
		if `flag_married_only'==1 {
			use TEMP_data_`year'_further_processed_married, replace
		}	
	
		
		
		
		local var_list_cross_section "region ind_proc characteristics_ptf"
		
		drop if ind_proc==0
		*variable to group by cross-sectionally (geography, demographics)
		egen long V=group(`var_list_cross_section') 
		
		sort V
	
		by V: egen wage_bill_V=total(weight*wage_bill)
		by V: egen hours_V=total(weight*hours)
		by V: egen employment_V=total(weight*employed)
		by V: egen N_obs_V=count(weight)	
		by V: gen id=_n
		drop if id>1
		drop id
		
		gen wage_bill_V_r=wage_bill_V/price_CPI
		
		keep if period!=.
		sort region characteristics_ptf
		by region characteristics_ptf: egen wage_bill_V_r_total=total(wage_bill_V_r)
		by region characteristics_ptf: egen hours_V_total=total(hours_V)
		by region characteristics_ptf: egen employment_V_total=total(employment_V)
		gen share_wage_bill_ind_V=wage_bill_V_r/wage_bill_V_r_total
		gen share_hours_ind_V=hours_V/hours_V_total
		gen share_employment_ind_V=employment_V/employment_V_total
		
		
		*construct industry shares by characteristics portfolios: share_..._ptf is industry share on demographic group level, share_..._V is on demographic-region level
		sort characteristics_ptf ind_proc
		by characteristics_ptf ind_proc: egen wage_bill_V_r_mean_ptf=mean(wage_bill_V_r)
		by characteristics_ptf ind_proc: egen wage_bill_V_r_sum_ptf=total(wage_bill_V_r)
		
		by characteristics_ptf ind_proc: egen hours_V_mean_ptf=mean(hours_V)
		by characteristics_ptf ind_proc: egen hours_V_sum_ptf=total(hours_V)
		
		by characteristics_ptf ind_proc: egen employment_V_mean_ptf=mean(employment_V)
		by characteristics_ptf ind_proc: egen employment_V_sum_ptf=total(employment_V)

		sort characteristics_ptf
		
		by characteristics_ptf: egen wage_bill_V_r_sum_ptf_total=total(wage_bill_V_r_mean_ptf)		
		by characteristics_ptf: egen hours_V_sum_ptf_total=total(hours_V_mean_ptf)
		by characteristics_ptf: egen employment_V_sum_ptf_total=total(employment_V_mean_ptf)
		
		gen share_wage_bill_ind_ptf=wage_bill_V_r_sum_ptf/wage_bill_V_r_sum_ptf_total
		gen share_hours_ind_ptf=hours_V_sum_ptf/hours_V_sum_ptf_total
		gen share_employment_ind_ptf=employment_V_sum_ptf/employment_V_sum_ptf_total
		
	
	
		
		keep `var_list_cross_section' share* period
	
		sort region ind_proc characteristics_ptf
		save TEMP_step2_`year', replace
	}

	use TEMP_step2_`year0', clear
	append using TEMP_step2_`year1'
	append using TEMP_step2_`year2'
	
	
	if `flag_hh_level'==0 & `flag_married_only'==0 {
		save TEMP_initial_wage_shares, replace
	}
	if `flag_hh_level'==1 & `flag_married_only'==0 {
		save TEMP_initial_wage_shares_hh, replace
	}
	if `flag_married_only'==1 {
		save TEMP_initial_wage_shares_married, replace
	}

	cap erase TEMP_step2_`year0'.dta
	cap erase TEMP_step2_`year1'.dta
	cap erase TEMP_step2_`year2'.dta
}






	

	
********************************************************************************
*********STEP 3: NATIONAL LEVEL WAGE GROWTH BY INDUSTRY/DEMOGRAPHICS************
********************************************************************************
if `flag_step3'==1{
	
	
	foreach year of numlist `year0' `year1' `year2' {
	*foreach year of numlist `year2' {
	
		if `flag_hh_level'==0 & `flag_married_only'==0 {
			use TEMP_data_`year'_further_processed, replace
		}
		if `flag_hh_level'==1 & `flag_married_only'==0 {
			use TEMP_data_`year'_further_processed_hh, replace
		}
		if `flag_married_only'==1 {
			use TEMP_data_`year'_further_processed_married, replace
		}	

		
		local var_list_cross_section "ind_proc characteristics_ptf"
		
		drop if ind_proc==0
		drop if characteristics_ptf==0
		
		*variable to group by cross-sectionally (geography, demographics)
		egen long V=group(`var_list_cross_section') 
	
		sort V
		
		*size of each group
		by V: egen population_V=total(weight)
		*total working hours and wage income in each group
		by V: egen hours_V=total(weight*hours)
		by V: egen employment_V=total(weight*employed)
		by V: egen wage_bill_V=total(weight*wage_bill)
		by V: egen N_obs_V=count(weight)
	
		by V: gen id=_n
		drop if id>1
		drop id
	
		gen wage_bill_V_r=wage_bill_V/price_CPI
		gen wage_V_r=wage_bill_V_r/hours_V
		gen wage_empl_V_r=wage_bill_V_r/employment_V
	
		sort V
	
		save TEMP_step3_`year', replace
	}
	
	
	**now construct growth rates by period
	use TEMP_step3_`year0', clear
	append using TEMP_step3_`year1'
	append using TEMP_step3_`year2'
	
	keep if period!=.
	
	sort period V
	by period V: gen id=_n
	drop if id>1
	sort V period
	by V: gen gr_wage_r_ind_US=log(wage_V_r/wage_V_r[_n-1])
	by V: gen gr_wage_empl_r_ind_US=log(wage_empl_V_r/wage_empl_V_r[_n-1])
	by V: gen gr_wage_bill_r_ind_US=log(wage_bill_V_r/wage_bill_V_r[_n-1])
	by V: gen gr_hours_ind_US=log(hours_V/hours_V[_n-1])
	by V: gen gr_employment_ind_US=log(employment_V/employment_V[_n-1])
	
	keep period gr* `var_list_cross_section' population_V
	
	
	if `flag_hh_level'==0 & `flag_married_only'==0 {
		save TEMP_wage_hour_industry_US, replace
	}
	if `flag_hh_level'==1 & `flag_married_only'==0 {
		save TEMP_wage_hour_industry_US_hh, replace
	}
	if `flag_married_only'==1 {
		save TEMP_wage_hour_industry_US_married, replace
	}
	
	sleep 500
	cap erase TEMP_step3_`year0'.dta
	cap erase TEMP_step3_`year1'.dta
	cap erase TEMP_step3_`year2'.dta
	
}

	
	


********************************************************************************
**************************STEP 4: СONTROLS**************************************
********************************************************************************
if `flag_step4'==1{
	
	
	foreach year of numlist `year0' `year1' `year2' {
	*foreach year of numlist `year2' {
	
		if `flag_hh_level'==0 & `flag_married_only'==0 {
			use TEMP_data_`year'_further_processed, replace
		}
		if `flag_hh_level'==1 & `flag_married_only'==0 {
			use TEMP_data_`year'_further_processed_hh, replace
		}
		if `flag_married_only'==1 {
			use TEMP_data_`year'_further_processed_married, replace
		}	

		
		local var_list_cross_section "region characteristics_ptf"
		
		gen male=.
		replace male=1 if sex==1
		replace male=0 if sex==2
	
		gen old=.
		replace old=1 if age>=50
		replace old=0 if age<50
	
		gen young=.
		replace young=1 if age<=35
		replace young=0 if age>35
	
		gen college_educ=.
		replace college_educ=1 if educ>=10 & educ<=11
		replace college_educ=0 if educ<10
	
		gen HS_and_less_educ=.
		replace HS_and_less_educ=1 if educ<7
		replace HS_and_less_educ=0 if educ>=7
	
		gen white=.
		replace white=1 if race==1
		replace white=0 if race!=1
		
		gen some_kids=.
		replace some_kids=1 if N_kids>0
		replace some_kids=0 if N_kids==0
		
		gen one_kid=.
		replace one_kid=1 if N_kids==1
		replace one_kid=0 if N_kids!=1
		
		gen two_kids=.
		replace two_kids=1 if N_kids==2
		replace two_kids=0 if N_kids!=2	
		
		gen many_kids=.
		replace many_kids=1 if N_kids>=3
		replace many_kids=0 if N_kids<3
	
		gen married=.
		replace married=1 if marst==1 | marst==2
		replace married=0 if marst>=3 & marst<=6
		
		if `year'==1980 {
			gen disab80=0
			replace disab80=1 if disabwrk>1
		}
		if `year'>1980 {
			gen disab_mob=0
			replace disab_mob=1 if diffmob==2
			gen disab_care=0
			replace disab_care=1 if diffcare==2
		}
	
	
		drop if male==.
		drop if young==.
		drop if old==.
		drop if college_educ==.
		drop if HS_and_less_educ==.
		drop if white==.
		drop if married==.
		

	
		*variable to group by cross-sectionally (geography, demographics)
		egen long V=group(`var_list_cross_section') 
	
		sort V
	
		by V: egen population_V=total(weight)
		by V: egen male_V=total(weight*(male==1))
		by V: egen white_V=total(weight*(white==1))
		by V: egen married_V=total(weight*(married==1))
		by V: egen some_kids_V=total(weight*(some_kids==1))
		by V: egen one_kid_V=total(weight*(one_kid==1))
		by V: egen two_kids_V=total(weight*(two_kids==1))
		by V: egen many_kids_V=total(weight*(many_kids==1))
		
		if `year'==1980 {
			by V: egen disab80_V=total(weight*(disab80==1))
		}
		if `year'>1980 {
			by V: egen disab_mob_V=total(weight*(disab_mob==1))
			by V: egen disab_care_V=total(weight*(disab_care==1))

		}
	
		by V: gen id=_n
		drop if id>1
		drop id
		
		save TEMP_step4_`year', replace
	}
	
	
	**now construct growth rates by period
	use TEMP_step4_`year0', clear
	append using TEMP_step4_`year1'
	append using TEMP_step4_`year2'
	
	local numb=0.00001
	
	sort V period
	by V period: egen population_V_sum=total(population_V)
	local list2 male white married disab80 disab_mob disab_care some_kids one_kid two_kids many_kids
	foreach var of local list2 {
		by V period: egen `var'_V_sum=total(`var'_V)
		gen frac_`var'_V=`var'_V_sum/population_V_sum
		replace frac_`var'_V=`numb' if frac_`var'_V==0
	}

	
	
	sort V period
	by V period: gen id=_n
	drop if id>1
	drop id
	
	preserve
		foreach var of local list2 {
			by V: gen gr_frac_`var'_V=log(frac_`var'_V/frac_`var'_V[_n-1])
			by V: gen gr_frac2_`var'_V=frac_`var'_V-frac_`var'_V[_n-1]
			by V: replace gr_frac_`var'_V=. if frac_`var'_V<`numb'
			by V: replace gr_frac_`var'_V=. if frac_`var'_V[_n-1]<`numb'
		}
		drop gr_frac_disab80
		drop gr_frac2_disab80
		keep if period==2
		keep period gr_frac* V
		save TEMP_gr_controls, replace
	restore
	
	
	keep if period==0
	drop frac_disab_mob_V frac_disab_care_V
	merge 1:1 V using TEMP_gr_controls
	cap erase TEMP_gr_controls.dta
	drop _merge

	
	
	keep period `var_list_cross_section' frac* gr_frac*

	
	sort region characteristics_ptf period
	
	if `flag_hh_level'==0 & `flag_married_only'==0 {
		save TEMP_regional_controls, replace
	}
	if `flag_hh_level'==1 & `flag_married_only'==0 {
		save TEMP_regional_controls_hh, replace
	}
	if `flag_married_only'==1 {
		save TEMP_regional_controls_married, replace
	}

	cap erase TEMP_step4_`year0'.dta
	cap erase TEMP_step4_`year1'.dta
	cap erase TEMP_step4_`year2'.dta
}




********************************************************************************
*********************************REGRESSIONS************************************
********************************************************************************



if `flag_hh_level'==0 & `flag_married_only'==0 {
	use TEMP_initial_wage_shares, clear
}
if `flag_hh_level'==1 & `flag_married_only'==0 {
	use TEMP_initial_wage_shares_hh, clear
}
if `flag_married_only'==1 {
	use TEMP_initial_wage_shares_married, clear
}

		


*keep only initial period
keep if period==0
*replace period to 2 for merging
replace period=2

if `flag_hh_level'==0 & `flag_married_only'==0 {
	merge m:1 period ind_proc characteristics_ptf using TEMP_wage_hour_industry_US
}
if `flag_hh_level'==1 & `flag_married_only'==0 {
	merge m:1 period ind_proc characteristics_ptf using TEMP_wage_hour_industry_US_hh
}
if `flag_married_only'==1 {
	merge m:1 period ind_proc characteristics_ptf using TEMP_wage_hour_industry_US_married
}

drop _merge
keep if period==2 /*keep only period=2. Shares are at initial period 0, growth rates are between periods 2 and 1 */


sort region characteristics_ptf ind_proc period

local list wage_r wage_bill_r wage_empl_r hours employment


*create weighting
rename share_wage_bill_ind_V share_wage_bill_r_ind_V
gen share_wage_r_ind_V=share_wage_bill_r_ind_V
gen share_wage_empl_r_ind_V=share_wage_bill_r_ind_V
foreach var of local list {
	by region characteristics_ptf ind_proc: gen gr_`var'_ind_US_w=gr_`var'_ind_US*share_`var'_ind_V
}


sort region characteristics_ptf period ind_proc


foreach var of local list {
	by region characteristics_ptf: egen gr_`var'_IV=total(gr_`var'_ind_US_w)
}
by region characteristics_ptf: gen id=_n
drop if id>1

keep region period characteristics_ptf gr_wage_bill_r_IV gr_wage_r_IV gr_hours_IV gr_wage_empl_r_IV gr_employment_IV
sort region period characteristics_ptf



if `flag_hh_level'==0 & `flag_married_only'==0 {
	merge 1:m region characteristics_ptf period using TEMP_wage_hour_V
}
if `flag_hh_level'==1 & `flag_married_only'==0 {
	merge 1:m region characteristics_ptf period using TEMP_wage_hour_V_hh
}
if `flag_married_only'==1 {
	merge 1:m region characteristics_ptf period using TEMP_wage_hour_V_married
}


drop _merge
sort region characteristics_ptf period
order period region characteristics_ptf



keep if period==2



if `flag_hh_level'==0 & `flag_married_only'==0 {
	merge m:1 region characteristics_ptf using TEMP_regional_controls
}
if `flag_hh_level'==1 & `flag_married_only'==0 {
	merge m:1 region characteristics_ptf using TEMP_regional_controls_hh
}
if `flag_married_only'==1 {
	merge m:1 region characteristics_ptf using TEMP_regional_controls_married
}

drop _merge


gen income_ptf=characteristics_ptf

if `flag_married_only'==0 {
	merge m:1 income_ptf using "../CEX_data/rec_price_IV"
}
if `flag_married_only'==1 {
	merge m:1 income_ptf using "../CEX_data/rec_price_IV_married_only"
}

drop _merge

gen y_h_per_employed=gr_hours_per_employed_V
gen y_h_empl=gr_employment_per_capita_V
gen y_h=gr_hours_per_capita_V
gen x_w=gr_wage_V_r
gen x_w_empl=gr_wage_empl_V_r
gen x_w_IV=gr_wage_bill_r_IV-gr_hours_IV
gen x_w_IV2=gr_wage_r_IV
gen x_w_empl_IV=gr_wage_bill_r_IV-gr_employment_IV


*add some more controls at characteristics portfolio level
preserve
	if `flag_hh_level'==0 & `flag_married_only'==0 {
		use TEMP_initial_wage_shares, clear
	}
	if `flag_hh_level'==1 & `flag_married_only'==0 {
		use TEMP_initial_wage_shares_hh, clear
	}
	if `flag_married_only'==1 {
		use TEMP_initial_wage_shares_married, clear
	}

	
	keep if period==0
	sort ind_proc characteristics_ptf share_hours_ind_ptf
	by ind_proc characteristics_ptf share_hours_ind_ptf: gen id=_n
	drop if id>1
	drop region id
	
	keep ind_proc characteristics_ptf share_wage_bill_ind_ptf share_hours_ind_ptf share_employment_ind_ptf
	reshape wide share*, i(characteristics_ptf) j(ind_proc)
	forvalues i=1(1)34 {
		replace share_wage_bill_ind_ptf`i'=0 if share_wage_bill_ind_ptf`i'==.
		replace share_hours_ind_ptf`i'=0 if share_hours_ind_ptf`i'==.
		replace share_employment_ind_ptf`i'=0 if share_employment_ind_ptf`i'==.
	}
	gen share_hours_manuf_nd=share_hours_ind_ptf4+share_hours_ind_ptf5+share_hours_ind_ptf6+share_hours_ind_ptf7+share_hours_ind_ptf8+share_hours_ind_ptf9+/*
		*/ share_hours_ind_ptf10+share_hours_ind_ptf11+share_hours_ind_ptf12
	gen share_hours_manuf_d=share_hours_ind_ptf13+share_hours_ind_ptf14+share_hours_ind_ptf15+share_hours_ind_ptf16+share_hours_ind_ptf17+/*
		*/ share_hours_ind_ptf18+share_hours_ind_ptf19+share_hours_ind_ptf20+share_hours_ind_ptf21+share_hours_ind_ptf22
	gen share_hours_manuf_total=share_hours_manuf_nd+share_hours_manuf_d
	save dop_controls, replace
restore
merge m:1 characteristics_ptf using dop_controls	
drop _merge
cap erase dop_controls.dta

*robustness: also compute manuf share at demographic-locality level
preserve
	if `flag_hh_level'==0 & `flag_married_only'==0 {
		use TEMP_initial_wage_shares, clear
	}
	if `flag_hh_level'==1 & `flag_married_only'==0 {
		use TEMP_initial_wage_shares_hh, clear
	}
	if `flag_married_only'==1 {
		use TEMP_initial_wage_shares_married, clear
	}

	
	keep if period==0
	gen flag_manuf_nd=0
	replace flag_manuf_nd=1 if ind_proc>=4 & ind_proc<=12
	gen flag_manuf_d=0
	replace flag_manuf_d=1 if ind_proc>=13 & ind_proc<=22
	keep if flag_manuf_nd==1 | flag_manuf_d==1
	
	sort region characteristics_ptf
	by region characteristics_ptf: egen share_hours_manuf_nd_V=total(share_hours_ind_V*flag_manuf_nd)
	by region characteristics_ptf: egen share_hours_manuf_d_V=total(share_hours_ind_V*flag_manuf_d)
	sort region characteristics_ptf share_hours_manuf_nd_V share_hours_manuf_d_V
	by region characteristics_ptf share_hours_manuf_nd_V share_hours_manuf_d_V: gen id=_n
	drop if id>1
	
	keep region characteristics_ptf share_hours_manuf_nd_V share_hours_manuf_d_V
	gen share_hours_manuf_total_V=share_hours_manuf_nd_V+share_hours_manuf_d_V
	save dop_controls, replace
restore
merge m:1 region characteristics_ptf using dop_controls	

cap erase dop_controls.dta


*share_hours_manuf_total: only variation across demographic groups
*share_hours_manuf_total_V: variation across demo and localities
replace share_hours_manuf_d_V=0 if share_hours_manuf_d_V==.
replace share_hours_manuf_nd_V=0 if share_hours_manuf_nd_V==.
replace share_hours_manuf_total_V=0 if share_hours_manuf_total_V==.






*main analysis: use frac_married as control
if `flag_hh_level'==0 {
	drop frac_one_kid frac_two_kids frac_many_kids frac_some_kids
	drop gr_frac_one_kid gr_frac_two_kids gr_frac_many_kids gr_frac_some_kids
	drop gr_frac2_one_kid gr_frac2_two_kids gr_frac2_many_kids gr_frac2_some_kids
}


*household heads/married analysis: use additional number-of-kids controls
if `flag_hh_level'==1 {
	drop frac_some_kids gr_frac_some_kids gr_frac2_some_kids
	drop frac_married gr_frac_married gr_frac2_married
}




local flag_app_tables=0


*drop region-demographic bins with few observations
drop if N_obs_V_min<50

**OLS
if `flag_app_tables'==1{
	eststo: reghdfe y_h x_w, absorb(region) vce(cluster region)
	eststo: reghdfe y_h x_w frac* gr_frac2*, absorb(region) vce(cluster region)
	eststo: reghdfe y_h x_w share_hours_manuf_total_V frac* gr_frac2*, absorb(region) vce(cluster region)
	eststo: ivreghdfe y_h (x_w=x_w_IV), absorb(region) cluster(region)
	eststo: ivreghdfe y_h (x_w=x_w_IV) frac* gr_frac2*, absorb(region) cluster(region)
	eststo: ivreghdfe y_h (x_w=x_w_IV) share_hours_manuf_total_V frac* gr_frac2*, absorb(region) cluster(region)	

	
	eststo clear
	eststo: reghdfe y_h x_p, absorb(region) vce(cluster region)
	eststo: reghdfe y_h x_p frac* gr_frac2*, absorb(region) vce(cluster region)
	eststo: reghdfe y_h x_p share_hours_manuf_total_V frac* gr_frac2*, absorb(region) vce(cluster region)
	eststo: ivreghdfe y_h (x_p=x_p_IV), absorb(region) cluster(region)
	eststo: ivreghdfe y_h (x_p=x_p_IV) frac* gr_frac2*, absorb(region) cluster(region)
	eststo: ivreghdfe y_h (x_p=x_p_IV) share_hours_manuf_total_V frac* gr_frac2*, absorb(region) cluster(region)	
do final_processing

}

if `flag_app_tables'==0{

	if `flag_hh_level'==0 & `flag_married_only'==0 {
		eststo: reghdfe y_h x_p x_w, absorb(region) vce(cluster region)
	}
	eststo: reghdfe y_h x_p x_w frac* gr_frac2*, absorb(region) vce(cluster region)
	eststo: reghdfe y_h x_p x_w share_hours_manuf_total_V frac* gr_frac2*, absorb(region) vce(cluster region)


	**IV
	if `flag_hh_level'==0 & `flag_married_only'==0 {
		eststo: ivreghdfe y_h (x_p x_w=x_w_IV x_p_IV), absorb(region) cluster(region)
	}
	eststo: ivreghdfe y_h (x_p x_w=x_w_IV x_p_IV) frac* gr_frac2*, absorb(region) cluster(region)
	eststo: ivreghdfe y_h (x_p x_w=x_w_IV x_p_IV) share_hours_manuf_total_V frac* gr_frac2*, absorb(region) cluster(region)


}




*gmm


local N=2016-1990+1
replace y_c=y_c/`N'
replace y_d=y_d/`N'
replace y_h=y_h/`N'
replace x_w=x_w/`N'
replace x_p=x_p/`N'



eststo: gmm (eq1: y_c-({beta_w}+1)*x_w-{beta_p}*x_p-{const_y_c})/*
	*/(eq2: y_d-({beta_w}+1)*x_w-({beta_p}-1)*x_p-{const_y_d}) /*
	*/(eq3: y_h-{beta_w}*x_w-{beta_p}*x_p-{const_y_h}), /*
	*/instruments(eq1: x_w x_p) /*
	*/instruments(eq2: x_w x_p) /*
	*/instruments(eq3: x_w x_p) /*
	*/winitial(identity) wmatrix(cluster characteristics_ptf region) vce(cluster characteristics_ptf region) twostep level(90)
estat overid 


if `flag_hh_level'==1 & `flag_married_only'==1 {
	eststo: gmm (eq1: y_c-({beta_w}+1)*x_w-{beta_p}*x_p-{const_y_c})/*
		*/(eq2: y_d-({beta_w}+1)*x_w-({beta_p}-1)*x_p-{const_y_d}) /*
		*/(eq3: y_h-{beta_w}*x_w-{beta_p}*x_p-{const_y_h}), /*
		*/instruments(eq1: x_w_IV x_p_IV) /*
		*/instruments(eq2: x_w_IV x_p_IV) /*
		*/instruments(eq3: x_w_IV x_p_IV) /*
		*/winitial(identity) wmatrix(cluster characteristics_ptf region) vce(cluster characteristics_ptf region) twostep level(90)
	estat overid 
}
else {
	eststo: gmm (eq1: y_c-({beta_w}+1)*x_w-{beta_p}*x_p-{const_y_c})/*
		*/(eq2: y_d-({beta_w}+1)*x_w-({beta_p}-1)*x_p-{const_y_d}) /*
		*/(eq3: y_h-{beta_w}*x_w-{beta_p}*x_p-{const_y_h}), /*
		*/instruments(eq1: x_w_IV x_p_IV) /*
		*/instruments(eq2: x_w_IV x_p_IV) /*
		*/instruments(eq3: x_w_IV x_p_IV) /*
		*/winitial(identity) wmatrix(cluster characteristics_ptf region) vce(cluster characteristics_ptf region) igmm level(90)
	estat overid 
}

if `flag_hh_level'==0 & `flag_married_only'==0 {
	eststo: gmm (eq1: y_c-({beta_w}+1)*x_w-{beta_p}*x_p-{const_y_h})/*
		*/(eq2: y_d-({beta_w}+1)*x_w-({beta_p}-1)*x_p-{const_y_h}) /*
		*/(eq3: y_h-{beta_w}*x_w-{beta_p}*x_p-{const_y_h}), /*
		*/instruments(eq1: x_w_IV x_p_IV) /*
		*/instruments(eq2: x_w_IV x_p_IV) /*
		*/instruments(eq3: x_w_IV x_p_IV) /*
		*/winitial(identity) wmatrix(cluster characteristics_ptf region) vce(cluster characteristics_ptf region) twostep level(90)
	estat overid 
}



